ArastrejuIndex.java example

Explorer
arastreju-master
/*
 * Copyright (C) 2013 lichtflut Forschungs- und Entwicklungsgesellschaft mbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.arastreju.sge.index;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.arastreju.sge.ConversationContext;
import org.arastreju.sge.inferencing.Inferencer;
import org.arastreju.sge.model.Statement;
import org.arastreju.sge.model.nodes.ResourceNode;
import org.arastreju.sge.naming.QualifiedName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * <p>
 *  Indexer implementation using Lucene
 * </p>
 *
 * <p>
 * 	Created Feb 01, 2013
 * </p>
 *
 * @author Timo Buhrmester
 */
public class ArastrejuIndex implements IndexUpdator, IndexSearcher {

	private static final Logger LOGGER = LoggerFactory.getLogger(ArastrejuIndex.class);

	private final List<Inferencer> inferencers = new ArrayList<Inferencer>();

	private final ConversationContext conversationContext;

	private final IndexProvider provider;

	// ----------------------------------------------------

	public ArastrejuIndex(ConversationContext cc, IndexProvider provider) {
		this.conversationContext = cc;
		this.provider = provider;
	}

	// ----------------------------------------------------

	/**
	 * Add a soft inferencer.
	 * @param inferencer The inferencer.
	 * @return This.
	 */
	public ArastrejuIndex add(Inferencer... inferencer) {
		Collections.addAll(inferencers, inferencer);
		return this;
	}

	// ----------------------------------------------------

	/**
	 * Index this node with all it's statements, regarding the current primary context.
	 * If the node already has been indexed, it will be updated.
	 * @param node The node to index.
	 */
	@Override
	public void index(ResourceNode node) {
		LOGGER.debug("Indexing ({})", node);

		Document doc = createDocument(node);
		ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
		try {
			index.getWriter().updateDocument(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(node.toURI())), doc); //creates if nonexistent
//			index.getWriter().commit(); // XXX to be revised when transactions enter the play
		} catch (IOException e) {
			String msg = "caught IOException while indexing resource " + node.toURI();
			LOGGER.error(msg, e);
			throw new IllegalStateException(msg, e);
		}
	}

	/**
	 * Remove the resource identified by the qualified name form the index.
	 * @param qn The qualified name.
	 */
	@Override
	public void remove(QualifiedName qn) {
		LOGGER.debug("remove({})", qn);
		ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
		try {
			index.getWriter().deleteDocuments(new Term(IndexFields.QUALIFIED_NAME, normalizeQN(qn.toURI())));
//			index.getWriter().commit();
		} catch (IOException e) {
			LOGGER.error("Could not remove node '{}' from index due to {}", qn, e.getMessage());
			throw new IllegalStateException("Could not remove node.", e);
		}
	}

	@Override
	public IndexSearchResult search(String query) {
		LOGGER.debug("search({})", query);
		ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
		org.apache.lucene.search.IndexSearcher searcher = index.getSearcher();

		/* default field is 'qn' as this is the only field common to all resources.
		 * (not that we're going to need a default field, anyway.) */
		QueryParser qp = new QueryParser(Version.LUCENE_35, IndexFields.QUALIFIED_NAME, new LowercaseWhitespaceAnalyzer(Version.LUCENE_35));
		qp.setAllowLeadingWildcard(true); //such queries should be avoided where possible nevertheless

		List<QualifiedName> resultList;
		try {
			/* we can use searcher.search(String, Collector) if we need all them results */
			AllHitsCollector collector = new AllHitsCollector();
			searcher.search(qp.parse(query), collector);

			resultList = collector.getList();
		} catch (IOException e) {
			LOGGER.error("Caught IOException while processing query '" + query + "'", e);
            throw new IllegalStateException("Could not remove node.", e);
		} catch (ParseException e) {
			LOGGER.error("Caught ParseException while processing query '" + query + "'", e);
            throw new IllegalStateException("Could not perform search.", e);
		}

		return new FixedIndexSearchResult(resultList);
	}

    // ----------------------------------------------------

    public void dump() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        org.apache.lucene.search.IndexSearcher searcher = index.getSearcher();
        IndexReader reader = searcher.getIndexReader();

        try {
            TopDocs top = searcher.search(new MatchAllDocsQuery(), 100);
            for (int i = 0; i < top.totalHits; i++) {
                Document doc = reader.document(top.scoreDocs[i].doc);
                LOGGER.info("---Document--- id: " + top.scoreDocs[i].doc);
                List<Fieldable> fields = doc.getFields();
                for (Fieldable f : fields) {
                    LOGGER.info("\tField: name='" + f.name() + "', val='" + f.stringValue() + "'");
                }

            }
        } catch (IOException e) {
            String msg = "caught IOException while dumping index";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

    /* no more calls to this object after close() */
    public void close() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        provider.release(conversationContext.getPrimaryContext());
        try {
            index.getReader().close();
            index.getWriter().close();
        } catch (IOException e) {
            String msg = "caught IOException while closing reader/writer";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

    public void clear() {
        ContextIndex index = provider.forContext(conversationContext.getPrimaryContext());
        try {
            index.getWriter().deleteAll();
            // index.getWriter().commit();
        } catch (IOException e) {
            String msg = "caught IOException while clearing index";
            LOGGER.error(msg, e);
            throw new RuntimeException(msg, e);
        }
    }

	// ----------------------------------------------------

	private Document createDocument(ResourceNode node) {
		Document doc = new Document();
		doc.add(new Field(IndexFields.QUALIFIED_NAME, node.toURI(), Store.YES, Index.ANALYZED));

		Set<Statement> asserted = node.getAssociations();
		Set<Statement> inferred = new HashSet<Statement>();
		for (Statement stmt : asserted) {
			for (Inferencer inferencer : inferencers) {
				inferencer.addInferenced(stmt, inferred);
			}
			addFields(doc, stmt);
		}
		for (Statement stmt : inferred) {
			addFields(doc, stmt);
		}
		return doc;
	}

	private void addFields(Document doc, Statement stmt) {
		doc.add(makeField(stmt));
		Field f = makeGenField(stmt);
		if (!findValue(doc, f.name(), f.stringValue())) {
			doc.add(f);
		}
	}

	private Field makeGenField(Statement stmt) {
		Field f;

		if (stmt.getObject().isResourceNode()) {
			f = new Field(IndexFields.RESOURCE_RELATION, stmt.getObject().asResource().toURI(), Store.YES, Index.ANALYZED);
		} else {
			f = new Field(IndexFields.RESOURCE_VALUE, stmt.getObject().asValue().getStringValue(), Store.YES, Index.ANALYZED); //analyzed, right?
		}

		return f;
	}

	private Field makeField(Statement stmt) {
		Field f;

		if (stmt.getObject().isResourceNode()) {
			f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asResource().toURI(), Store.YES, Index.ANALYZED);
		} else {
			/* This replicates the behaviour of the old neo index, for now.
			 * TODO: Should probably use different sorts of fields  (like
			 * NumericField) where applicable to leverage more of lucenes functionality */
			f = new Field(stmt.getPredicate().toURI(), stmt.getObject().asValue().getStringValue(), Store.YES, Index.ANALYZED); //analyzed, right?
		}

		return f;
	}

	private boolean findValue(Document doc, String fieldName, String val) {
		String[] vals = doc.getValues(fieldName);
		for (String v : vals) {
			if (v.equals(val)) {
				return true;
			}
		}

		return false;
	}

	/* this is applied whenever we search for a qn.
	 * XXX do we actually want case-insensitive search on URI?
	 * LuceneQueryBuilder.normalizeValue() sort of enforces/suggests this. */
	private String normalizeQN(String qn) {
		return qn.toLowerCase();
	}

}